4  Explore Sub-Daily Data

Purpose: Explore data at sub-daily temporal resolutions (e.g., 15-min and 1-hour time steps) and compare outputs to daily data.

Given that streamflow can change so quickly in small, headwater streams, are we missing a key part of the story by using flow data summarized as daily means? Using daily mean flow reduces the range of values, particularly at the upper end (i.e., high flows), and so we may be overlooking the g~G relationship at very high flows. (Note limited analysis of 15-min data as Montana and Wyoming data is collected at the hourly timescale).

4.1 Data

4.1.1 Load data

Bring in site info and sub-daily data

Code
# site information and locations
siteinfo <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_SiteInformation.csv")
siteinfo_sp <- st_as_sf(siteinfo, coords = c("long", "lat"), crs = 4326)
mapview(siteinfo_sp, zcol = "designation")
Code
# flow/yield (and temp) data 
dat_sub <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_FlowTempData_Raw_ECODandNWIS.csv") 

dat_little <- dat_sub %>% 
  filter(site_name %in% c("West Brook Lower", "Mitchell Brook", "Jimmy Brook", "Obear Brook Lower", "West Brook Upper", "West Brook Reservoir", "Sanderson Brook", "Avery Brook", "West Whately Brook")) %>% 
  select(site_name, datetime, flow, area_sqmi)

dat_big <- dat_sub %>% filter(site_name == "West Brook NWIS") %>% select(site_name, datetime, flow, area_sqmi)

Check time zones

Code
unique(tz(dat_little$datetime))
[1] "UTC"
Code
unique(tz(dat_big$datetime))
[1] "UTC"

Organize 15-min data

Code
dat_15min <- bind_rows(dat_little, dat_big) %>%
  mutate(site_name = factor(site_name, levels = c("West Brook Lower", "Mitchell Brook", "Jimmy Brook", "Obear Brook Lower", "West Brook Upper", "West Brook Reservoir", "Sanderson Brook", "Avery Brook", "West Whately Brook", "West Brook NWIS"))) %>%
  mutate(flow_cms = flow*0.02831683199881, area_sqkm = area_sqmi*2.58999) %>%
  mutate(yield = flow_cms * 900 * (1/(area_sqkm)) * (1/1000000) * 1000)
head(dat_15min)
# A tibble: 6 × 7
  site_name   datetime             flow area_sqmi flow_cms area_sqkm  yield
  <fct>       <dttm>              <dbl>     <dbl>    <dbl>     <dbl>  <dbl>
1 Avery Brook 2020-02-20 05:00:00  5.37      2.83    0.152      7.34 0.0187
2 Avery Brook 2020-02-20 05:15:00  5.3       2.83    0.150      7.34 0.0184
3 Avery Brook 2020-02-20 05:30:00  5.17      2.83    0.146      7.34 0.0180
4 Avery Brook 2020-02-20 05:45:00  5.27      2.83    0.149      7.34 0.0183
5 Avery Brook 2020-02-20 06:00:00  5.3       2.83    0.150      7.34 0.0184
6 Avery Brook 2020-02-20 06:15:00  4.94      2.83    0.140      7.34 0.0172

Organize 1-hour data

Code
dat_1hr <- bind_rows(dat_little, dat_big) %>%
  mutate(site_name = factor(site_name, levels = c("West Brook Lower", "Mitchell Brook", "Jimmy Brook", "Obear Brook Lower", "West Brook Upper", "West Brook Reservoir", "Sanderson Brook", "Avery Brook", "West Whately Brook", "West Brook NWIS"))) %>%
  filter(!is.na(flow)) %>% 
  mutate(datetime = floor_date(datetime, unit = "hour")) %>%
  group_by(site_name, datetime) %>% 
  summarise(flow = mean(flow), area_sqmi = unique(area_sqmi)) %>%
  ungroup() %>%
  mutate(flow_cms = flow*0.02831683199881, area_sqkm = area_sqmi*2.58999) %>%
  mutate(yield = flow_cms * 3600 * (1/(area_sqkm)) * (1/1000000) * 1000)
head(dat_1hr)
# A tibble: 6 × 7
  site_name        datetime             flow area_sqmi flow_cms area_sqkm  yield
  <fct>            <dttm>              <dbl>     <dbl>    <dbl>     <dbl>  <dbl>
1 West Brook Lower 2020-01-01 05:00:00  9.5       8.51    0.269      22.0 0.0439
2 West Brook Lower 2020-01-01 06:00:00  9.18      8.51    0.260      22.0 0.0425
3 West Brook Lower 2020-01-01 07:00:00  8.94      8.51    0.253      22.0 0.0414
4 West Brook Lower 2020-01-01 08:00:00  8.99      8.51    0.255      22.0 0.0416
5 West Brook Lower 2020-01-01 09:00:00  8.67      8.51    0.245      22.0 0.0401
6 West Brook Lower 2020-01-01 10:00:00  8.56      8.51    0.242      22.0 0.0396

Load daily data

Code
dat_1day <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_FlowTempData_DailyWeekly.csv") %>%
  filter(site_name %in% c("West Brook Lower", "Mitchell Brook", "Jimmy Brook", "Obear Brook Lower", "West Brook Upper", "West Brook Reservoir", "Sanderson Brook", "Avery Brook", "West Whately Brook", "West Brook NWIS")) %>%
  mutate(site_name = factor(site_name, levels = c("West Brook Lower", "Mitchell Brook", "Jimmy Brook", "Obear Brook Lower", "West Brook Upper", "West Brook Reservoir", "Sanderson Brook", "Avery Brook", "West Whately Brook", "West Brook NWIS")))
head(dat_1day)
# A tibble: 6 × 31
  station_no site_name   site_id basin      subbasin  region   lat  long elev_ft
  <chr>      <fct>       <chr>   <chr>      <chr>     <chr>  <dbl> <dbl>   <dbl>
1 01171000   Avery Brook AB      West Brook West Bro… Mass    42.4 -72.7    699.
2 01171000   Avery Brook AB      West Brook West Bro… Mass    42.4 -72.7    699.
3 01171000   Avery Brook AB      West Brook West Bro… Mass    42.4 -72.7    699.
4 01171000   Avery Brook AB      West Brook West Bro… Mass    42.4 -72.7    699.
5 01171000   Avery Brook AB      West Brook West Bro… Mass    42.4 -72.7    699.
6 01171000   Avery Brook AB      West Brook West Bro… Mass    42.4 -72.7    699.
# ℹ 22 more variables: area_sqmi <dbl>, designation <chr>, date <date>,
#   DischargeReliability <dbl>, TempReliability <dbl>, flow_mean <dbl>,
#   flow_min <dbl>, flow_max <dbl>, tempc_mean <dbl>, tempc_min <dbl>,
#   tempc_max <dbl>, flow_mean_filled <dbl>, flow_mean_cms <dbl>,
#   flow_mean_filled_cms <dbl>, area_sqkm <dbl>, Yield_mm <dbl>,
#   Yield_filled_mm <dbl>, flow_mean_7 <dbl>, flow_mean_filled_7 <dbl>,
#   tempc_mean_7 <dbl>, Yield_mm_7 <dbl>, Yield_filled_mm_7 <dbl>

4.1.2 View 15-min data

Plot 15 min time series data

Code
dat_15min %>% select(datetime, site_name, yield) %>% spread(key = site_name, value = yield) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Yield (mm)") %>% dyOptions(colors = c(hcl.colors(9, "Zissou 1"), "black")) %>% dyHighlight() 

4.1.3 View 1-hour data

Plot 1-hour time series data

Code
dat_1hr %>% select(datetime, site_name, yield) %>% spread(key = site_name, value = yield) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Yield (mm)") %>% dyOptions(colors = c(hcl.colors(9, "Zissou 1"), "black")) %>% dyHighlight() 

4.1.4 View 1-day data

Plot 1-day/daily time series data

Code
dat_1day %>% select(date, site_name, Yield_filled_mm) %>% spread(key = site_name, value = Yield_filled_mm) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Yield (mm)") %>% dyOptions(colors = c(hcl.colors(9, "Zissou 1"), "black")) %>% dyHighlight() 

4.2 Hourly data

set baseflow separation (for the Lyne-Hollick one-parameter digital recursive filter) and event delineation paramters (as in Wasko and Guo, 2022)

  • alp: alpha filter parameter, higher values “lower” the estimated baseflow (thus making it difficult to delineate events)
  • numpass: number of passes. Ladson et al. (2013) recommend 3 passes for daily data (default in baseflowB() function) and 9 passes for hourly data
  • thresh: baseflow index threshold for event delineation, higher threshold values make it “easier” to delineate events

4.2.1 Event pairing

Conduct event pairing using hydroEvents package to understand lag time between peak flows at big and little g’s

4.2.1.1 Conduct event pairing

Code
# baseflow separation and event delineation parameters
alp <- 0.925
numpass <- 9
thresh <- 0.5

# sites
sites <- unique(dat_1hr$site_name)[1:9]

# empty list to store output
outlist <- list()

for (j in 1:length(sites)) {
  # grab big and little g data and combine into a single tibble
  littleg <- dat_1hr %>% filter(site_name == sites[j])
  bigg <- dat_1hr %>% filter(site_name == "West Brook NWIS")
  mytib <- bigg %>% select(datetime, yield) %>% rename(yield_big = yield) %>% left_join(littleg %>% select(datetime, yield) %>% rename(yield_little = yield))
  
  # baseflow separation
  mytib_bf <- mytib %>% 
  filter(!is.na(yield_big), !is.na(yield_little)) %>% 
  mutate(bf_big = baseflowB(yield_big, alpha = alp, passes = numpass)$bf, 
         bfi_big = baseflowB(yield_big, alpha = alp, passes = numpass)$bfi,
         bf_little = baseflowB(yield_little, alpha = alp, passes = numpass)$bf, 
         bfi_little = baseflowB(yield_little, alpha = alp, passes = numpass)$bfi)
  
  # delineate events
  events_little <- eventBaseflow(mytib_bf$yield_little, BFI_Th = thresh, bfi = mytib_bf$bfi_little)
  events_big <- eventBaseflow(mytib_bf$yield_big, BFI_Th = thresh, bfi = mytib_bf$bfi_big)
  
  # event matching
  mypairs <- pairEvents(events_little, events_big, lag = 5, type = 1)
  mypairs_com <- mypairs[complete.cases(mypairs),]
  
  # get matched event info
  matchpeaktib <- tibble(index_little = rep(NA, times = dim(mypairs_com)[1]), 
                         index_big = rep(NA, times = dim(mypairs_com)[1]),
                         datetime_little = rep(NA, times = dim(mypairs_com)[1]), 
                         datetime_big = rep(NA, times = dim(mypairs_com)[1]),
                         yield_little = rep(NA, times = dim(mypairs_com)[1]), 
                         yield_big = rep(NA, times = dim(mypairs_com)[1]))
  for (i in 1:dim(mypairs_com)[1]) {
    matchpeaktib$index_little[i] <- events_little$which.max[events_little$srt == mypairs_com$srt[i]]
    matchpeaktib$index_big[i] <- events_big$which.max[events_big$srt == mypairs_com$matched.srt[i]]
    matchpeaktib$datetime_little[i] <- mytib_bf$datetime[events_little$which.max[events_little$srt == mypairs_com$srt[i]]]
    matchpeaktib$datetime_big[i] <- mytib_bf$datetime[events_big$which.max[events_big$srt == mypairs_com$matched.srt[i]]]
    matchpeaktib$yield_little[i] <- events_little$max[events_little$srt == mypairs_com$srt[i]]
    matchpeaktib$yield_big[i] <- events_big$max[events_big$srt == mypairs_com$matched.srt[i]]
    }
  matchpeaktib <- matchpeaktib %>% mutate(datetime_little = as_datetime(datetime_little),
                                          datetime_big = as_datetime(datetime_big),
                                          timediff_hrs = as.numeric(difftime(datetime_big, datetime_little), units = "hours"),
                                          site_name = sites[j])
  
  # store output in list
  outlist[[j]] <- matchpeaktib
}
matchpeaktib <- do.call(rbind, outlist)
(matchpeaktib)
# A tibble: 524 × 8
   index_little index_big datetime_little     datetime_big        yield_little
          <int>     <int> <dttm>              <dttm>                     <dbl>
 1          176       177 2020-02-07 22:00:00 2020-02-07 23:00:00        0.150
 2          505       508 2020-02-21 15:00:00 2020-02-21 18:00:00        0.111
 3          648       650 2020-02-27 14:00:00 2020-02-27 16:00:00        0.432
 4         1012      1013 2020-03-13 18:00:00 2020-03-13 19:00:00        0.190
 5         1153      1154 2020-03-19 15:00:00 2020-03-19 16:00:00        0.191
 6         1406      1407 2020-03-30 04:00:00 2020-03-30 05:00:00        0.327
 7         1664      1665 2020-04-09 22:00:00 2020-04-09 23:00:00        0.204
 8         1762      1764 2020-04-14 00:00:00 2020-04-14 02:00:00        0.419
 9         2166      2168 2020-04-30 20:00:00 2020-04-30 22:00:00        0.214
10         2194      2194 2020-05-02 00:00:00 2020-05-02 00:00:00        0.811
# ℹ 514 more rows
# ℹ 3 more variables: yield_big <dbl>, timediff_hrs <dbl>, site_name <fct>

4.2.1.2 Plot output

Distribution of lags

Constrain lag times to realistic values (>=0 and <= 24) as event pairing is not perfect, and view histograms by site

Code
matchpeaktib %>% filter(timediff_hrs >= -5 & timediff_hrs <= 10) %>% ggplot() + geom_histogram(aes(x = timediff_hrs)) + facet_wrap(~site_name)

View distributions summarized as boxplots. Sites are ordered from closest to Big G (bottom) to furthest (top). Interestingly, there is not a strong pattern of longer lag times for further sites.

Code
# matchpeaktib %>% filter(timediff_hrs >= -10 & timediff_hrs <= 20) %>% ggplot() + geom_boxplot(aes(x = site_name, y = timediff_hrs)) + coord_flip()
matchpeaktib %>% filter(timediff_hrs >= -5 & timediff_hrs <= 10) %>% mutate(bigevcd = as.numeric(datetime_big)) %>% group_by(bigevcd, site_name) %>% summarize(timediff_hrs = min(timediff_hrs)) %>% ggplot() + geom_boxplot(aes(x = site_name, y = timediff_hrs)) + coord_flip()

Connect specific events across sites.

  • Is there consistency in lag time across sites? Generally, yes
  • is there a pattern of longer lag times with increasing distance upstream? No
Code
matchpeaktib %>% filter(timediff_hrs >= -5 & timediff_hrs <= 10) %>% mutate(bigevcd = as.factor(as.numeric(datetime_big))) %>% group_by(bigevcd, site_name) %>% summarize(timediff_hrs = min(timediff_hrs)) %>% ggplot(aes(x = site_name, y = jitter(timediff_hrs), group = bigevcd, color = bigevcd)) + geom_line() + coord_flip() + theme(legend.position = "none")

Lag by yield

Does lag time depend on the magnitude of yield at big G? Under high flows when water is moving more quickly, we might expect the lag to be shorter (negative relationship).

View global relationship

Code
matchpeaktib %>% filter(timediff_hrs >= -5 & timediff_hrs <= 10) %>% ggplot(aes(x = yield_big, y = jitter(timediff_hrs))) + geom_point() + geom_smooth()

View by site

Code
matchpeaktib %>% filter(timediff_hrs >= -5 & timediff_hrs <= 10) %>% ggplot(aes(x = yield_big, y = jitter(timediff_hrs))) + geom_point() + geom_smooth() + facet_wrap(~ site_name)

There appears to be some evidence for shorter lag times with increasing flow, but this relationship is only evident for very low flows. What is more apparent is the overall attenuation in variability in lag time as flows increase: at very low flows, lags are highly variable, but less variable (and intermediate in magnitude) under high flows.

Lag by time

Does lag time change over time? Perhaps lag time is seasonal and changes with antecedant conditions. Note that this is not the best way to get at the question of importance of antecedant conditions.

View global relationship

Code
matchpeaktib %>% filter(timediff_hrs >= -5 & timediff_hrs <= 10) %>% mutate(doy = yday(datetime_little)) %>% ggplot(aes(x = doy, y = jitter(timediff_hrs))) + geom_point() + geom_smooth()

View by site

Code
matchpeaktib %>% filter(timediff_hrs >= -5 & timediff_hrs <= 10) %>% mutate(doy = yday(datetime_little)) %>% ggplot(aes(x = doy, y = jitter(timediff_hrs))) + geom_point() + geom_smooth() + facet_wrap(~ site_name)

There does not appear to be any consistent pattern (within or among sites) in how lag times change with time of year

4.2.2 Gg pseudo-analysis

4.2.2.1 Prepare data

Specify big and little g data

Code
dat_big <- dat_1hr %>% filter(site_name %in% c("West Brook NWIS"))
dat_little <- dat_1hr %>% filter(site_name %in% c("West Brook Lower", "Mitchell Brook", "Jimmy Brook", "Obear Brook Lower", "West Brook Upper", "West Brook Reservoir", "Sanderson Brook", "Avery Brook", "West Whately Brook"))

Conduct baseflow separation and event delineation on big g data

Code
# baseflow separation
dat_big <- dat_big %>% 
  filter(!is.na(yield)) %>% 
  mutate(bf = baseflowB(yield, alpha = 0.925, passes = 9)$bf, 
         bfi = baseflowB(yield, alpha = 0.925, passes = 9)$bfi)

# event delineation
events <- eventBaseflow(dat_big$yield, BFI_Th = 0.75, bfi = dat_big$bfi)
events <- events %>% mutate(len = end - srt + 1)

# define positions of non-events
srt <- c(1)
end <- c(events$srt[1]-1)
for (i in 2:(dim(events)[1])) {
  srt[i] <- events$end[i-1]+1
  end[i] <- events$srt[i]-1
}
nonevents <- data.frame(tibble(srt, end) %>% mutate(len = end - srt) %>% filter(len >= 0) %>% select(-len) %>% add_row(srt = events$end[dim(events)[1]]+1, end = dim(dat_big)[1]))

# create vectors of binary event/non-event and event IDs
isevent_vec <- rep(2, times = dim(dat_big)[1])
eventid_vec <- rep(NA, times = dim(dat_big)[1])
for (i in 1:dim(events)[1]) { 
  isevent_vec[c(events[i,1]:events[i,2])] <- 1 
  eventid_vec[c(events[i,1]:events[i,2])] <- i
}

# create vector of non-event IDs
noneventid_vec <- rep(NA, times = dim(dat_big)[1])
for (i in 1:dim(nonevents)[1]) { noneventid_vec[c(nonevents[i,1]:nonevents[i,2])] <- i }

# create vector of "agnostic events": combined hydro events and non-events
agnevents <- rbind(events %>% select(srt, end) %>% mutate(event = 1), nonevents %>% mutate(event = 0)) %>% arrange((srt))
agneventid_vec <- c()
for (i in 1:dim(agnevents)[1]){ agneventid_vec[c(agnevents[i,1]:agnevents[i,2])] <- i }

# add event/non-event vectors to Big G data
dat_big <- dat_big %>% 
  mutate(isevent = isevent_vec, 
         eventid = eventid_vec,
         noneventid = noneventid_vec,
         agneventid = agneventid_vec,
         big_event_yield = ifelse(isevent_vec == 1, yield, NA),
         big_nonevent_yield = ifelse(isevent_vec == 2, yield, NA),
         big_event_quick = big_event_yield - bf) %>%
  rename(big_yield = yield, big_bf = bf, big_bfi = bfi)
dat_big
# A tibble: 42,233 × 16
   site_name    datetime             flow area_sqmi flow_cms area_sqkm big_yield
   <fct>        <dttm>              <dbl>     <dbl>    <dbl>     <dbl>     <dbl>
 1 West Brook … 2020-01-31 15:00:00  23.2      11.4    0.657      29.5    0.0801
 2 West Brook … 2020-01-31 16:00:00  26.8      11.4    0.759      29.5    0.0926
 3 West Brook … 2020-01-31 17:00:00  23.3      11.4    0.660      29.5    0.0805
 4 West Brook … 2020-01-31 18:00:00  18.9      11.4    0.534      29.5    0.0652
 5 West Brook … 2020-01-31 19:00:00  18.7      11.4    0.530      29.5    0.0647
 6 West Brook … 2020-01-31 20:00:00  19.0      11.4    0.539      29.5    0.0658
 7 West Brook … 2020-01-31 21:00:00  18.9      11.4    0.534      29.5    0.0652
 8 West Brook … 2020-01-31 22:00:00  19.0      11.4    0.539      29.5    0.0658
 9 West Brook … 2020-01-31 23:00:00  19.4      11.4    0.549      29.5    0.0669
10 West Brook … 2020-02-01 00:00:00  19.0      11.4    0.539      29.5    0.0658
# ℹ 42,223 more rows
# ℹ 9 more variables: big_bf <dbl>, big_bfi <dbl>, isevent <dbl>,
#   eventid <int>, noneventid <int>, agneventid <int>, big_event_yield <dbl>,
#   big_nonevent_yield <dbl>, big_event_quick <dbl>
Code
# plot
dat_big %>% select(datetime, big_yield, big_bf, big_event_yield, big_nonevent_yield) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Hourly yield (mm)") %>% dyOptions(fillGraph = TRUE, drawGrid = FALSE, axisLineWidth = 1.5)

A few things to note:

  • Many delineated events are <24 hours in length
  • Much of the natural diel variation in streamflow (“stream breathing” due to diel cycle of ET) ends up being delineated as individual events
    • When viewed at this time-scale, there also seems to be variation in terms of whether or not the “stream breathing” exists, which could be due to changes in how the data were processed and subsequently smoothed (or not)
  • WMA interpolataion becomes an issue at the sub-daily time scale. I.e., the time scale of the data changes during observed (15-min) and interpolated (4-hour) periods

Combine big and little g data

Code
dat_wb2 <- dat_little %>% 
  filter(datetime >= min(dat_big$datetime) & datetime <= max(dat_big$datetime)) %>%
  left_join(dat_big %>% select(datetime, big_yield, big_bf, big_bfi, agneventid, isevent)) %>%
  group_by(site_name, agneventid) %>% 
  summarise(eventlen = n(),
            mindate = min(datetime),
            isevent = unique(isevent), 
            yield_little_cumul = sum(yield+0.01),
            yield_big_cumul = sum(big_yield+0.01),
            yield_little_cumul_log = log(yield_little_cumul),
            yield_big_cumul_log = log(yield_big_cumul),
            yield_little_mean_log = mean(log(yield+0.01)),
            yield_big_mean_log = mean(log(big_yield+0.01))) %>%
  ungroup() %>%
  filter(!is.na(isevent)) %>%
  mutate(site_name = factor(site_name, levels = c("West Brook Lower", "Mitchell Brook", "Jimmy Brook", "Obear Brook Lower", "West Brook Upper", "West Brook Reservoir", "Sanderson Brook", "Avery Brook", "West Whately Brook")),
         site_name_cd = as.numeric(site_name))
dat_wb2
# A tibble: 3,233 × 12
   site_name  agneventid eventlen mindate             isevent yield_little_cumul
   <fct>           <int>    <int> <dttm>                <dbl>              <dbl>
 1 West Broo…          1      168 2020-01-31 15:00:00       2            12.4   
 2 West Broo…          2       29 2020-02-07 15:00:00       1             3.33  
 3 West Broo…          3       19 2020-02-08 20:00:00       2             1.50  
 4 West Broo…          4        6 2020-02-09 15:00:00       1             0.408 
 5 West Broo…          5       90 2020-02-09 21:00:00       2             6.92  
 6 West Broo…          6       25 2020-02-13 15:00:00       1             2.74  
 7 West Broo…          7      104 2020-02-14 16:00:00       2             9.13  
 8 West Broo…          8       36 2020-02-19 00:00:00       1             3.18  
 9 West Broo…          9        1 2020-02-20 12:00:00       2             0.0641
10 West Broo…         10        5 2020-02-20 13:00:00       1             0.305 
# ℹ 3,223 more rows
# ℹ 6 more variables: yield_big_cumul <dbl>, yield_little_cumul_log <dbl>,
#   yield_big_cumul_log <dbl>, yield_little_mean_log <dbl>,
#   yield_big_mean_log <dbl>, site_name_cd <dbl>

4.2.2.2 Event-level distribution

Find ~long events/non-events

Code
agntib1 <- dat_big %>% group_by(agneventid) %>% summarize(eventlen = n(), isevent = unique(isevent), mindate = min(date(datetime)), maxdate = max(date(datetime))) %>% filter(isevent == 1) %>% arrange(desc(eventlen))
agntib2 <- dat_big %>% group_by(agneventid) %>% summarize(eventlen = n(), isevent = unique(isevent), mindate = min(date(datetime)), maxdate = max(date(datetime))) %>% filter(isevent == 2) %>% arrange(desc(eventlen))
agntib1
# A tibble: 560 × 5
   agneventid eventlen isevent mindate    maxdate   
        <int>    <int>   <dbl> <date>     <date>    
 1        888      169       1 2024-06-19 2024-06-26
 2        159      138       1 2020-09-29 2020-10-05
 3        917      127       1 2024-08-09 2024-08-15
 4        330      126       1 2021-07-17 2021-07-23
 5        308      122       1 2021-05-29 2021-06-03
 6        961      122       1 2024-12-09 2024-12-14
 7        348      121       1 2021-08-22 2021-08-27
 8        919      117       1 2024-08-18 2024-08-23
 9        322      108       1 2021-06-30 2021-07-05
10        729      108       1 2023-07-02 2023-07-06
# ℹ 550 more rows
Code
agntib2
# A tibble: 408 × 5
   agneventid eventlen isevent mindate    maxdate   
        <int>    <int>   <dbl> <date>     <date>    
 1        380      415       2 2021-11-15 2021-12-03
 2        924      393       2 2024-09-09 2024-09-26
 3        920      366       2 2024-08-23 2024-09-07
 4        682      360       2 2023-04-02 2023-04-17
 5        152      355       2 2020-09-13 2020-09-28
 6        594      353       2 2022-10-27 2022-11-10
 7        826      345       2 2024-01-30 2024-02-13
 8        794      342       2 2023-11-08 2023-11-22
 9        278      327       2 2021-04-02 2021-04-15
10        645      290       2 2023-02-11 2023-02-23
# ℹ 398 more rows

Plot exceedance curves for selected events (top row) and baseflow periods (bottom row)

Code
ggarrange(p1, p2, nrow = 2)

4.2.2.3 Plot output

gG relationship with data summarized as cumulative yield per event/non-event:

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_cumul_log, y = yield_little_cumul_log, group = site_name, color = site_name)) + 
  geom_point(alpha = 0.25) + 
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + facet_wrap(~isevent)

Effect of (log) cumulative yield at Big G on (log) cumulative yield at little g during baseflow and event periods.

Facet by site

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_cumul_log, y = yield_little_cumul_log, group = site_name, color = site_name)) + 
  geom_point(alpha = 0.25) + 
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + facet_wrap(~site_name) +
  theme(legend.position = "none")

Effect of (log) cumulative yield at Big G on (log) cumulative yield at little g during baseflow and event periods, faceted by site.

Gg relationship with data summarized as mean yield per event/non-event

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_mean_log, y = yield_little_mean_log, group = site_name, color = site_name)) + 
  geom_point(alpha = 0.25) + 
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + facet_wrap(~isevent)

Effect of (log) mean yield at Big G on (log) mean yield at little g during baseflow and event periods.

Facet by site

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_mean_log, y = yield_little_mean_log, group = site_name, color = site_name)) + 
  geom_point(alpha = 0.25) + 
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + facet_wrap(~site_name) +
  theme(legend.position = "none")

Effect of (log) mean yield at Big G on (log) mean yield at little g during baseflow and event periods, faceted by site.

4.3 Daily data

4.3.1 Event pairing

4.3.1.1 Conduct event pairing

Code
# baseflow separation and event delineation parameters
alp <- 0.925
numpass <- 3
thresh <- 0.5

# sites
sites <- unique(dat_1day$site_name)[1:9]

# empty list to store output
outlist <- list()

for (j in 1:length(sites)) {
  # grab big and little g data and combine into a single tibble
  littleg <- dat_1day %>% filter(site_name == sites[j])
  bigg <- dat_1day %>% filter(site_name == "West Brook NWIS")
  mytib <- bigg %>% select(date, Yield_filled_mm) %>% rename(yield_big = Yield_filled_mm) %>% left_join(littleg %>% select(date, Yield_filled_mm) %>% rename(yield_little = Yield_filled_mm))
  
  # baseflow separation
  mytib_bf <- mytib %>% 
  filter(!is.na(yield_big), !is.na(yield_little)) %>% 
  mutate(bf_big = baseflowB(yield_big, alpha = alp, passes = numpass)$bf, 
         bfi_big = baseflowB(yield_big, alpha = alp, passes = numpass)$bfi,
         bf_little = baseflowB(yield_little, alpha = alp, passes = numpass)$bf, 
         bfi_little = baseflowB(yield_little, alpha = alp, passes = numpass)$bfi)
  
  # delineate events
  events_little <- eventBaseflow(mytib_bf$yield_little, BFI_Th = thresh, bfi = mytib_bf$bfi_little)
  events_big <- eventBaseflow(mytib_bf$yield_big, BFI_Th = thresh, bfi = mytib_bf$bfi_big)
  
  # event matching
  mypairs <- pairEvents(events_little, events_big, lag = 5, type = 1)
  mypairs_com <- mypairs[complete.cases(mypairs),]
  
  # get matched event info
  matchpeaktib <- tibble(datetime_little = rep(NA, times = dim(mypairs_com)[1]), datetime_big = rep(NA, times = dim(mypairs_com)[1]),
                         yield_little = rep(NA, times = dim(mypairs_com)[1]), yield_big = rep(NA, times = dim(mypairs_com)[1]))
  for (i in 1:dim(mypairs_com)[1]) {
    matchpeaktib$datetime_little[i] <- mytib_bf$date[events_little$which.max[events_little$srt == mypairs_com$srt[i]]]
    matchpeaktib$datetime_big[i] <- mytib_bf$date[events_big$which.max[events_big$srt == mypairs_com$matched.srt[i]]]
    matchpeaktib$yield_little[i] <- events_little$max[events_little$srt == mypairs_com$srt[i]]
    matchpeaktib$yield_big[i] <- events_big$max[events_big$srt == mypairs_com$matched.srt[i]]
    }
  matchpeaktib <- matchpeaktib %>% mutate(datetime_little = as_date(datetime_little),
                                          datetime_big = as_date(datetime_big),
                                          timediff_dys = as.numeric(difftime(datetime_big, datetime_little), units = "days"),
                                          site_name = sites[j])
  
  # store output in list
  outlist[[j]] <- matchpeaktib
}
matchpeaktib <- do.call(rbind, outlist)
(matchpeaktib)
# A tibble: 412 × 6
   datetime_little datetime_big yield_little yield_big timediff_dys site_name  
   <date>          <date>              <dbl>     <dbl>        <dbl> <fct>      
 1 2020-02-27      2020-02-27          9.61      5.56             0 Avery Brook
 2 2020-03-30      2020-03-30          8.65      6.56             0 Avery Brook
 3 2020-04-14      2020-04-14          8.18      7.40             0 Avery Brook
 4 2020-05-01      2020-05-02         16.9      13.5              1 Avery Brook
 5 2020-05-16      2020-05-16          2.84      1.99             0 Avery Brook
 6 2020-06-12      2020-06-12          0.872     0.427            0 Avery Brook
 7 2020-07-04      2020-07-04          1.10      0.576            0 Avery Brook
 8 2020-07-10      2020-07-10          4.76      0.905            0 Avery Brook
 9 2020-07-23      2020-07-23          0.676     1.17             0 Avery Brook
10 2020-08-05      2020-08-05          0.801     0.243            0 Avery Brook
# ℹ 402 more rows

4.3.1.2 Plot output

Distribution of lags

Constrain lag times to realistic values (>=0 and <= 24) as event pairing is not perfect, and view histograms by site

Code
matchpeaktib %>% filter(timediff_dys >= 0 & timediff_dys <= 1) %>% ggplot() + geom_histogram(aes(x = timediff_dys)) + facet_wrap(~site_name)

View distributions summarized as means. Sites are ordered from closest to Big G (bottom) to furthest (top). There is general pattern of longer lag times for further sites.

Code
matchpeaktib %>% filter(timediff_dys >= 0 & timediff_dys <= 1) %>% group_by(site_name) %>% summarize(diffmean = mean(timediff_dys), diffsd = sd(timediff_dys)) %>% ggplot() + 
  geom_point(aes(x = site_name, y = diffmean)) + 
  #geom_errorbar(aes(x = site_name, ymin = diffmean - diffsd, ymax = diffmean + diffsd)) + 
  coord_flip()

Lag by yield

Does lag time depend on the magnitude of yield at big G? Under high flows when water is moving more quickly, we might expect the lag to be shorter (negative relationship).

View global relationship

Code
matchpeaktib %>% filter(timediff_dys >= 0 & timediff_dys <= 1) %>% ggplot(aes(x = yield_big, y = jitter(timediff_dys))) + geom_point() + geom_smooth()

View by site

Code
matchpeaktib %>% filter(timediff_dys >= 0 & timediff_dys <= 1)  %>% ggplot(aes(x = yield_big, y = jitter(timediff_dys))) + geom_point() + geom_smooth() + facet_wrap(~ site_name)

As with the hourly data, there appears to be some evidence for shorter lag times with increasing flow, but this relationship is only evident for very low flows. Although we note that 1-day lags are very rare (16% of all observations)

Lag by time

Does lag time change over time? Perhaps lag time is seasonal and changes with antecedant conditions. Note that this is not the best way to get at the question of importance of antecedant conditions.

View global relationship

Code
matchpeaktib %>% filter(timediff_dys >= 0 & timediff_dys <= 1) %>% mutate(doy = yday(datetime_little)) %>% ggplot(aes(x = doy, y = jitter(timediff_dys))) + geom_point() + geom_smooth()

View by site

Code
matchpeaktib %>% filter(timediff_dys >= 0 & timediff_dys <= 1) %>% mutate(doy = yday(datetime_little)) %>% ggplot(aes(x = doy, y = (timediff_dys))) + geom_point() + geom_smooth() + facet_wrap(~ site_name)

4.3.2 Gg pseudo-analysis

4.3.2.1 Prepare data

Specify big and little g data

Code
dat_big <- dat_1day %>% filter(site_name %in% c("West Brook NWIS")) %>% rename(yield = Yield_filled_mm)
dat_little <- dat_1day %>% filter(site_name %in% c("West Brook Lower", "Mitchell Brook", "Jimmy Brook", "Obear Brook Lower", "West Brook Upper", "West Brook Reservoir", "Sanderson Brook", "Avery Brook", "West Whately Brook")) %>% rename(yield = Yield_filled_mm)

Conduct baseflow separation and event delineation on big g data

Code
# baseflow separation
dat_big <- dat_big %>% 
  filter(!is.na(yield)) %>% 
  mutate(bf = baseflowB(yield, alpha = 0.925, passes = 3)$bf, 
         bfi = baseflowB(yield, alpha = 0.925, passes = 3)$bfi)

# event delineation
events <- eventBaseflow(dat_big$yield, BFI_Th = 0.75, bfi = dat_big$bfi)
events <- events %>% mutate(len = end - srt + 1)

# define positions of non-events
srt <- c(1)
end <- c(events$srt[1]-1)
for (i in 2:(dim(events)[1])) {
  srt[i] <- events$end[i-1]+1
  end[i] <- events$srt[i]-1
}
nonevents <- data.frame(tibble(srt, end) %>% mutate(len = end - srt) %>% filter(len >= 0) %>% select(-len) %>% add_row(srt = events$end[dim(events)[1]]+1, end = dim(dat_big)[1]))

# create vectors of binary event/non-event and event IDs
isevent_vec <- rep(2, times = dim(dat_big)[1])
eventid_vec <- rep(NA, times = dim(dat_big)[1])
for (i in 1:dim(events)[1]) { 
  isevent_vec[c(events[i,1]:events[i,2])] <- 1 
  eventid_vec[c(events[i,1]:events[i,2])] <- i
}

# create vector of non-event IDs
noneventid_vec <- rep(NA, times = dim(dat_big)[1])
for (i in 1:dim(nonevents)[1]) { noneventid_vec[c(nonevents[i,1]:nonevents[i,2])] <- i }

# create vector of "agnostic events": combined hydro events and non-events
agnevents <- rbind(events %>% select(srt, end) %>% mutate(event = 1), nonevents %>% mutate(event = 0)) %>% arrange((srt))
agneventid_vec <- c()
for (i in 1:dim(agnevents)[1]){ agneventid_vec[c(agnevents[i,1]:agnevents[i,2])] <- i }

# add event/non-event vectors to Big G data
dat_big <- dat_big %>% 
  mutate(isevent = isevent_vec, 
         eventid = eventid_vec,
         noneventid = noneventid_vec,
         agneventid = agneventid_vec,
         big_event_yield = ifelse(isevent_vec == 1, yield, NA),
         big_nonevent_yield = ifelse(isevent_vec == 2, yield, NA),
         big_event_quick = big_event_yield - bf) %>%
  rename(big_yield = yield, big_bf = bf, big_bfi = bfi)
dat_big
# A tibble: 1,411 × 40
   station_no site_name       site_id basin  subbasin region   lat  long elev_ft
   <chr>      <fct>           <chr>   <chr>  <chr>    <chr>  <dbl> <dbl>   <dbl>
 1 01171100   West Brook NWIS WBR     West … West Br… Mass    42.4 -72.6    154.
 2 01171100   West Brook NWIS WBR     West … West Br… Mass    42.4 -72.6    154.
 3 01171100   West Brook NWIS WBR     West … West Br… Mass    42.4 -72.6    154.
 4 01171100   West Brook NWIS WBR     West … West Br… Mass    42.4 -72.6    154.
 5 01171100   West Brook NWIS WBR     West … West Br… Mass    42.4 -72.6    154.
 6 01171100   West Brook NWIS WBR     West … West Br… Mass    42.4 -72.6    154.
 7 01171100   West Brook NWIS WBR     West … West Br… Mass    42.4 -72.6    154.
 8 01171100   West Brook NWIS WBR     West … West Br… Mass    42.4 -72.6    154.
 9 01171100   West Brook NWIS WBR     West … West Br… Mass    42.4 -72.6    154.
10 01171100   West Brook NWIS WBR     West … West Br… Mass    42.4 -72.6    154.
# ℹ 1,401 more rows
# ℹ 31 more variables: area_sqmi <dbl>, designation <chr>, date <date>,
#   DischargeReliability <dbl>, TempReliability <dbl>, flow_mean <dbl>,
#   flow_min <dbl>, flow_max <dbl>, tempc_mean <dbl>, tempc_min <dbl>,
#   tempc_max <dbl>, flow_mean_filled <dbl>, flow_mean_cms <dbl>,
#   flow_mean_filled_cms <dbl>, area_sqkm <dbl>, Yield_mm <dbl>,
#   big_yield <dbl>, flow_mean_7 <dbl>, flow_mean_filled_7 <dbl>, …
Code
# plot
dat_big %>% select(date, big_yield, big_bf, big_event_yield, big_nonevent_yield) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Hourly yield (mm)") %>% dyOptions(fillGraph = TRUE, drawGrid = FALSE, axisLineWidth = 1.5)

Combine big and little g data

Code
dat_wb2 <- dat_little %>% 
  filter(date >= min(dat_big$date) & date <= max(dat_big$date)) %>%
  left_join(dat_big %>% select(date, big_yield, big_bf, big_bfi, agneventid, isevent)) %>%
  group_by(site_name, agneventid) %>% 
  summarise(eventlen = n(),
            mindate = min(date),
            isevent = unique(isevent), 
            yield_little_cumul = sum(yield+0.01),
            yield_big_cumul = sum(big_yield+0.01),
            yield_little_cumul_log = log(yield_little_cumul),
            yield_big_cumul_log = log(yield_big_cumul),
            yield_little_mean_log = mean(log(yield+0.01)),
            yield_big_mean_log = mean(log(big_yield+0.01))) %>%
  ungroup() %>%
  filter(!is.na(isevent)) %>%
  mutate(site_name = factor(site_name, levels = c("West Brook Lower", "Mitchell Brook", "Jimmy Brook", "Obear Brook Lower", "West Brook Upper", "West Brook Reservoir", "Sanderson Brook", "Avery Brook", "West Whately Brook")),
         site_name_cd = as.numeric(site_name))
dat_wb2
# A tibble: 714 × 12
   site_name        agneventid eventlen mindate    isevent yield_little_cumul
   <fct>                 <int>    <int> <date>       <dbl>              <dbl>
 1 West Brook Lower          1        6 2020-01-31       2               9.18
 2 West Brook Lower          2        4 2020-02-06       1               7.51
 3 West Brook Lower          3        3 2020-02-10       2               4.74
 4 West Brook Lower          4        3 2020-02-13       1               6.97
 5 West Brook Lower          5       10 2020-02-16       2              15.3 
 6 West Brook Lower          6        6 2020-02-26       1              18.5 
 7 West Brook Lower          7        4 2020-03-03       1              10.6 
 8 West Brook Lower          8        5 2020-03-07       2              10.4 
 9 West Brook Lower          9        4 2020-03-12       1              11.3 
10 West Brook Lower         10        2 2020-03-16       2               4.68
# ℹ 704 more rows
# ℹ 6 more variables: yield_big_cumul <dbl>, yield_little_cumul_log <dbl>,
#   yield_big_cumul_log <dbl>, yield_little_mean_log <dbl>,
#   yield_big_mean_log <dbl>, site_name_cd <dbl>

4.3.2.2 Plot output

gG relationship with data summarized as cumulative yield per event/non-event:

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_cumul_log, y = yield_little_cumul_log, group = site_name, color = site_name)) + 
  geom_point(alpha = 0.25) + 
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + facet_wrap(~isevent)

Effect of (log) cumulative yield at Big G on (log) cumulative yield at little g during baseflow and event periods.

Facet by site

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_cumul_log, y = yield_little_cumul_log, group = site_name, color = site_name)) + 
  geom_point(alpha = 0.25) + 
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + facet_wrap(~site_name) +
  theme(legend.position = "none")

Effect of (log) cumulative yield at Big G on (log) cumulative yield at little g during baseflow and event periods, faceted by site.

Gg relationship with data summarized as mean yield per event/non-event

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_mean_log, y = yield_little_mean_log, group = site_name, color = site_name)) + 
  geom_point(alpha = 0.25) + 
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + facet_wrap(~isevent)

Effect of (log) mean yield at Big G on (log) mean yield at little g during baseflow and event periods.

Facet by site

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_mean_log, y = yield_little_mean_log, group = site_name, color = site_name)) + 
  geom_point(alpha = 0.25) + 
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + facet_wrap(~site_name) +
  theme(legend.position = "none")

Effect of (log) mean yield at Big G on (log) mean yield at little g during baseflow and event periods, faceted by site.

4.4 Summary

  • Event delineation at sub-daily (i.e., hourly) time scale is strongly affected by data processing steps (e.g., smoothing and interpolation by WMA) and landscape processes that are not the focus of this study (i.e., diel fluctuations in flow due to ET)
    • In some cases, changes in how WMA treats diel fluctuations may introduce further inconsistencies into downstream analyses
  • At the hourly timescale, temporal mismatches between big and little g time series data due to routing time are evident, typically in the 4-8 hour range for the West Brook.
    • There does not appear to be any predictable relationship between lag time and flow magnitude or time of year.
    • Because many of the event/non-event periods are very short, these mismatches may have a large effect on how reasonable it is to apply Big G events to little g data, and whether Big G events/non-events adequately encompass similar flow conditions at little g sites.
    • Temporal mismatches are less prevalent and more predictable for the daily data.
  • Inferences regarding the g~G relationship derived from hourly data generally do not match those derived from daily data.
    • the g~G relationship is more strongly affected by assumptions of temporal alignment for hourly data relative to daily data.

4.5 Dynamic time warping

Explore the use of dynamic time warping (Giorgino 2009) to align hourly time series data.

4.5.1 Select data

Trim to restricted period b/c DTW cannot handle very large datasets

Code
littleg <- dat_1hr %>% filter(site_name == "Avery Brook", datetime >= as_datetime("2020-03-01 00:00:00") & datetime <= as_datetime("2020-06-01 00:00:00"))
bigg <- dat_1hr %>% filter(site_name == "West Brook NWIS", datetime >= as_datetime("2020-03-01 00:00:00") & datetime <= as_datetime("2020-06-01 00:00:00"))

mytib <- bigg %>% select(datetime, yield) %>% rename(yield_big = yield) %>% left_join(littleg %>% select(datetime, yield) %>% rename(yield_little = yield))
mytib %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Yield (mm)")

4.5.2 Align data

Code
align1hr <- dtw(x = unlist(littleg %>% select(yield)), y = unlist(bigg %>% select(yield)), step = asymmetric, keep = TRUE)
str(align1hr)
List of 20
 $ costMatrix        : num [1:2209, 1:2209] 0.00176 0.00908 0.02005 0.03084 0.04066 ...
 $ directionMatrix   : int [1:2209, 1:2209] NA 1 1 1 1 1 1 1 1 1 ...
 $ stepPattern       : 'stepPattern' num [1:6, 1:4] 1 1 2 2 3 3 1 0 1 0 ...
  ..- attr(*, "npat")= num 3
  ..- attr(*, "norm")= chr "N"
 $ N                 : int 2209
 $ M                 : int 2209
 $ call              : language dtw(x = unlist(littleg %>% select(yield)), y = unlist(bigg %>% select(yield)),      step.pattern = asymmetric, ke| __truncated__
 $ openEnd           : logi FALSE
 $ openBegin         : logi FALSE
 $ windowFunction    :function (iw, jw, ...)  
 $ jmin              : int 2209
 $ distance          : num 35.7
 $ normalizedDistance: num 0.0162
 $ index1            : num [1:2209] 1 2 3 4 5 6 7 8 9 10 ...
 $ index2            : num [1:2209] 1 3 5 7 9 11 13 13 13 13 ...
 $ index1s           : num [1:2209] 1 2 3 4 5 6 7 8 9 10 ...
 $ index2s           : num [1:2209] 1 3 5 7 9 11 13 13 13 13 ...
 $ stepsTaken        : int [1:2208] 3 3 3 3 3 3 1 1 1 1 ...
 $ localCostMatrix   : 'crossdist' num [1:2209, 1:2209] 0.00176 0.00732 0.01097 0.01079 0.00982 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:2209] "yield1" "yield2" "yield3" "yield4" ...
  .. ..$ : chr [1:2209] "yield1" "yield2" "yield3" "yield4" ...
  ..- attr(*, "method")= chr "Euclidean"
  ..- attr(*, "call")= language proxy::dist(x = x, y = y, method = dist.method)
 $ query             : num [1:2209, 1] 0.1018 0.0962 0.0926 0.0928 0.0937 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:2209] "yield1" "yield2" "yield3" "yield4" ...
  .. ..$ : NULL
 $ reference         : num [1:2209, 1] 0.1036 0.1012 0.1005 0.0982 0.096 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:2209] "yield1" "yield2" "yield3" "yield4" ...
  .. ..$ : NULL
 - attr(*, "class")= chr "dtw"

View index alignment

Code
plot(align1hr, type = "threeway")

Show aligned values

Code
plot(align1hr, type = "twoway", offset = - 1)

View aligned timeseries using dyGraphs. Clearly, this is not a great approach as it matches multiple query data points to the same reference index, i.e., the result is multiple little g flow readings at a single time point. As seen in the plots above, it also does not align the series correctly.

Code
aligneddata <- tibble(datetime = bigg$datetime[align1hr$index2], query = littleg$yield[align1hr$index1], reference = bigg$yield[align1hr$index2])
aligneddata %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Yield (mm)")